/*
This code extracts calibration targets from the results of Acemoglu and Autor (2011) and Acemoglu and Restrepo (2022)
	- 10, 50, and 90 percentile of the hourly wage distribution in 1980, in logs and 2008 dollars (PCE deflator), from Acemoglu and Autor (2011)
	- log change in wages at the 10, 50, and 90 percentile of the 1980 hourly wage distribution induced by automation between 1980 and 2016, from Acemoglu and Restrepo (2022)
*/

* set working directory to the "Replication AL2025" folder
cd "C:/Users/jonas.loebbing/Desktop/Replication AL2025"

* load May ORG wage percentile dataset from Acemoglu and Autor (2011)
use "Data/Wages/AA11 results/morg-pctile-mov-1974-2008", replace

* keep 1980 values and percentile indicator
keep hrwg_perc_label hrwg_perc_val1980

* percentiles 6 and 7 have the same value in 1980, correct p7 by linear interpolation
replace hrwg_perc_val1980 = (hrwg_perc_val1980[_n-1]+hrwg_perc_val1980[_n+1])/2 if hrwg_perc_val1980 == hrwg_perc_val1980[_n-1]

* rename wage variable to prepare for merge 
rename hrwg_perc_val1980 baseline_wage 

* merge with baseline results for wage effects of automation from Acemoglu and Restrepo (2022)
* (note that both datasets express wages in 2008 dollars using the PCE price index for inflation)
merge 1:1 baseline_wage using "Data/Wages/AR22 results/AR22_geresults"

* sort by 1980 log wage
sort baseline_wage

* use linear interpolation to obtain percentiles for the baseline wages in Acemoglu/Restrepo (2022) data
ipolate hrwg_perc_label baseline_wage, generate(hrwg_perc_label_inter)

* local polynomial regression of log wage change due to automation on baseline wage percentile 
* the generated variable dlnwg_smooth contains the smoothed values of the log wage change at percentiles 3 to 97
lpoly dlnwg hrwg_perc_label_inter [aw=popwts], kernel(gaussian) degree(2) at(hrwg_perc_label) bw(15) generate(dlnwg_smooth)

* label variables
label variable dlnwg "Log wage change AR22"
label variable dlnwg_smooth "Smoothed log wage change"

* replicate panel D of figure 7 in Acemoglu and Restrepo (2022)
twoway (scatter dlnwg baseline_wage [w=popwts], msize(*.4) mcolor(%20)), xtitle("Log hourly wage in 1980" "(2008 dollars)", color(gs6) size(large)) ytitle("", size(medium)) title("Acemoglu and Restrepo (2022), Figure 7D", color(black) position(11)) saving(Output/Figures/AR22-fig7D, replace) 	ylabel(#8) graphregion(fcolor(white) ifcolor(white)  lcolor(white) ilcolor(white)) plotregion(fcolor(white))

* graph of smoothed function
twoway (scatter dlnwg hrwg_perc_label_inter [w=popwts], msize(*.4) mcolor(%20)) (scatter dlnwg_smooth hrwg_perc_label_inter, mcolor(black)), xtitle("Wage percentile in 1980", size(medium)) ytitle("Log wage change", size(medium)) saving(Output/Figures/fig-b1, replace) 	ylabel(#8) graphregion(fcolor(white) ifcolor(white)  lcolor(white) ilcolor(white)) plotregion(fcolor(white)) legend(position(6) cols(2))
graph export Output/Figures/fig-b1.pdf, replace

* produce csv file with calibration inputs
preserve
drop if missing(hrwg_perc_label)
export delimited hrwg_perc_label baseline_wage dlnwg_smooth using "Output/Wage Data/wage.csv", replace
restore

* save the generated data 
save "Output/Wage Data/wage-data", replace